Production over Station and Season
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle") &
norep_filter_name != "MOTEJ515"),
aes(x = lakesite, y = log10(as.numeric(fraction_bac_abund)), fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Log10(Bacterial Cells/mL)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)

prod1 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(axis.text.x = element_blank(), #element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
prod2 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF TOTAL PRODUCTIVITY
ggplot(dplyr::filter(metadata, year == "2015" & fraction == "Free"),
aes(x = lakesite, y = tot_bacprod)) +
geom_bar(stat = "identity", color = "black", fill = "grey", position=position_dodge()) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, ymax = tot_bacprod + SD_tot_bacprod),width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning: Removed 1 rows containing missing values (geom_errorbar).

########## LONG FORMAT OF COMMUNITY PRODUCTIVITY
community_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF CELLULAR PRODUCTIVITY
percapita_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(season ~ ., scales = "free_x") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(community_prod_plot_long, percapita_prod_plot_long,
labels = c("A", "B"),
nrow = 1, ncol = 2,
align = "v")
## Warning: Removed 1 rows containing missing values (geom_bar).

plot_grid(prod1, prod2, labels = c("A", "B"),
rel_heights = c(1, 1.3),
nrow = 2, ncol = 1,
align = "v")
## Warning: Removed 1 rows containing missing values (geom_bar).

### BY STATION
prod_by_station <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = lakesite, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Total Production (ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
prod_by_season <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = season, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Season") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.y = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(prod_by_station, prod_by_season, labels = c("A", "B"),
rel_widths = c(1, 0.75),
nrow = 1, ncol = 2,
align = "h")

scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)
set.seed(777)
# Calculate the SOREN distance
soren_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = TRUE) # Make it presence/absence
p1 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = soren_whole_dist,
color = "fraction",
shape = "season",
title = "Sørensen") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "none")
# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = FALSE) # Make it Abundance weighted
p2 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "season",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "right", legend.title = element_blank())
plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2,
rel_widths = c(0.8, 1.2))

# Calculate bray curtis distance matrix
musk_bray <- phyloseq::distance(scaled_surface_PAFLA_pruned_rm2, method = "bray")
# make a data frame from the sample_data
sampledf <- data.frame(sample_data(scaled_surface_PAFLA_pruned_rm2))
# Adonis test
adonis(musk_bray ~ fraction + season , data = sampledf)
##
## Call:
## adonis(formula = musk_bray ~ fraction + season, data = sampledf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## fraction 1 1.3903 1.39032 16.2279 0.30094 0.001 ***
## season 2 1.5161 0.75804 8.8479 0.32816 0.001 ***
## Residuals 20 1.7135 0.08567 0.37089
## Total 23 4.6199 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear Models with Environmental Variables
pca_scores_df <- summary(pca_environ)$sites %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "norep_filter_name") %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
dplyr::select(-norep_filter_name)
metadata_pca <- metadata %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
left_join(pca_scores_df, by = "norep_water_name")
##### SINGLE REGRESSION with PC1
## Free living
comm_lm_PC1_free <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC1_part <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC1_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC1_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm_row1 <- c("Free", "Per-Liter","PC1",
round(comm_lm_PC1_free$adj.r.squared, digits = 3),
round(comm_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row2 <- c("Particle", "Per-Liter","PC1",
round(comm_lm_PC1_part$adj.r.squared, digits = 3),
round(comm_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_row3 <- c("Free", "Per-Capita","PC1",
round(percap_lm_PC1_free$adj.r.squared, digits = 3),
round(percap_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row4 <- c("Particle", "Per-Capita", "PC1",
round(percap_lm_PC1_part$adj.r.squared, digits = 3),
round(percap_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_results_df <-
rbind(pca_lm_row1, pca_lm_row2, pca_lm_row3, pca_lm_row4)
colnames(pca_lm_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm_results_df) = NULL
pander(pca_lm_results_df,
caption = "Single Linear regression statistics for PC1.")
Single Linear regression statistics for PC1.
| Free |
Per-Liter |
PC1 |
0.06 |
0.221 |
| Particle |
Per-Liter |
PC1 |
-0.075 |
0.641 |
| Free |
Per-Capita |
PC1 |
0.014 |
0.307 |
| Particle |
Per-Capita |
PC1 |
0.006 |
0.33 |
##### SINGLE REGRESSION with PC2
## Free living
comm_lm_PC2_free <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC2_part <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC2_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC2_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm2_row1 <- c("Free", "Per-Liter","PC2",
round(comm_lm_PC2_free$adj.r.squared, digits = 3),
round(comm_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row2 <- c("Particle", "Per-Liter","PC2",
round(comm_lm_PC2_part$adj.r.squared, digits = 3),
round(comm_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_row3 <- c("Free", "Per-Capita","PC2",
round(percap_lm_PC2_free$adj.r.squared, digits = 3),
round(percap_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row4 <- c("Particle", "Per-Capita", "PC2",
round(percap_lm_PC2_part$adj.r.squared, digits = 3),
round(percap_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_results_df <-
rbind(pca_lm2_row1, pca_lm2_row2, pca_lm2_row3, pca_lm2_row4)
colnames(pca_lm2_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm2_results_df) = NULL
pander(pca_lm2_results_df,
caption = "Single Linear regression statistics for PC2.")
Single Linear regression statistics for PC2.
| Free |
Per-Liter |
PC2 |
-0.056 |
0.531 |
| Particle |
Per-Liter |
PC2 |
0.207 |
0.077 |
| Free |
Per-Capita |
PC2 |
0.033 |
0.268 |
| Particle |
Per-Capita |
PC2 |
0.393 |
0.023 |
##### MULTIPLE REGRESSION
## Free living
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.792 -10.435 -7.571 12.769 30.622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.058 5.049 4.765 0.00102 **
## PC1 6.185 4.880 1.268 0.23676
## PC2 -3.262 4.880 -0.669 0.52058
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.49 on 9 degrees of freedom
## Multiple R-squared: 0.1858, Adjusted R-squared: 0.004856
## F-statistic: 1.027 on 2 and 9 DF, p-value: 0.3966
## Particle-associated
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0696 -4.7286 -0.1293 3.1707 15.5457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.958 2.198 4.531 0.00142 **
## PC1 1.147 2.124 0.540 0.60246
## PC2 -4.032 2.124 -1.898 0.09014 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.613 on 9 degrees of freedom
## Multiple R-squared: 0.302, Adjusted R-squared: 0.1469
## F-statistic: 1.947 on 2 and 9 DF, p-value: 0.1983
## PER CAPITA: Free living
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5522 -0.2055 -0.1250 0.2276 0.5222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5750 0.1109 -68.318 1.56e-13 ***
## PC1 0.1176 0.1072 1.097 0.301
## PC2 -0.1269 0.1072 -1.184 0.267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3841 on 9 degrees of freedom
## Multiple R-squared: 0.2245, Adjusted R-squared: 0.05219
## F-statistic: 1.303 on 2 and 9 DF, p-value: 0.3185
## PER CAPITA: Particle-associated
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52230 -0.13916 -0.02677 0.17348 0.63461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6647 0.1112 -59.946 6.66e-12 ***
## PC1 0.2069 0.1081 1.913 0.0921 .
## PC2 -0.3653 0.1096 -3.332 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3643 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6253, Adjusted R-squared: 0.5317
## F-statistic: 6.676 on 2 and 8 DF, p-value: 0.01971
PC1_lm_plot <-
ggplot(metadata_pca, aes(x = PC1, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
PC2_lm_plot <-
ggplot(metadata_pca, aes(x = PC2, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
plot_grid(PC2_lm_plot, PC2_lm_plot,
labels = c("A", "B"),
nrow = 1, ncol = 2)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

par(oma(0.1, 0.1, 0.1, 0.1))
## Error in oma(0.1, 0.1, 0.1, 0.1): could not find function "oma"
biplot(pca_environ,
xlab = paste("PC1", paste(round(summary(pca_environ)$cont$importance[2,1]*100, digits = 2), "%", sep = ""), sep = ": "),
ylab = paste("PC2", paste(round(summary(pca_environ)$cont$importance[2,2]*100, digits = 2), "%", sep = ""), sep = ": "))

Figure 1
######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 x 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fctr> <dbl>
## 1 Particle 41168.88
## 2 Free 734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Counts) \n (cells/mL)") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))
## # A tibble: 2 x 2
## fraction `mean(frac_bacprod)`
## <fctr> <dbl>
## 1 Particle 9.958333
## 2 Free 24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylab("Community Production \n (μgC/L/hr)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=68, fontface = "bold", size = 3.5, color = "gray40",
label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))
## # A tibble: 2 x 2
## fraction `mean(fracprod_per_cell)`
## <fctr> <dbl>
## 1 Particle 4.816116e-07
## 2 Free 3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylim(c(-8.5, -5)) +
ylab("Log10(Cellular Production) \n(μgC/cell/hr)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######## FIGURE 1
# legend
legend <- get_legend(poster_a)
## Error in f(...): object 'season_shapes' not found
row1_plots <- plot_grid(poster_a + theme(legend.position = "none"), poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
## Error in f(...): object 'season_shapes' not found
#fig_1 <-
plot_grid(row1_plots, legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))
## Error in plot_grid(row1_plots, legend, ncol = 1, nrow = 2, rel_heights = c(1, : object 'row1_plots' not found
Figure S1
work_df <- metadata %>%
dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
dplyr::select(-norep_filter_name)
part_work_df <- work_df %>%
filter(fraction == "Particle") %>%
rename(part_bacabund = fraction_bac_abund,
part_prod = frac_bacprod,
part_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
free_work_df <- work_df %>%
filter(fraction == "Free") %>%
rename(free_bacabund = fraction_bac_abund,
free_prod = frac_bacprod,
free_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
byfrac_df <- part_work_df %>%
left_join(free_work_df, by = "norep_water_name")
byfrac_df$season <- substr(byfrac_df$norep_water_name, 5,5) # 7th letter = month sampled
byfrac_df$season <- as.character(byfrac_df$season)
byfrac_df$season <- ifelse(byfrac_df$season == "5", "Spring",
ifelse(byfrac_df$season == "7", "Summer",
ifelse(byfrac_df$season == "9", "Fall",
"NA")))
byfrac_df$season <- factor(byfrac_df$season, levels = c("Spring", "Summer", "Fall"))
summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))
##
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df,
## norep_water_name != "MOTE515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52690 -0.08048 0.05903 0.19565 0.35255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0900 3.2247 0.648 0.533
## log10(free_bacabund) 0.4264 0.5532 0.771 0.461
##
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared: 0.06194, Adjusted R-squared: -0.04229
## F-statistic: 0.5943 on 1 and 9 DF, p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"),
aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Bacterial Counts) \n (cells/mL)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "bottom",
legend.title = element_blank())
## Warning: Ignoring unknown parameters: width
lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)
plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
xlab("Free") + ylab("Particle") +
ggtitle("Community Production \n(μgC/L/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
## Warning: Ignoring unknown parameters: width
lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)
plot3 <- ggplot(byfrac_df,
aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Cellular Production) \n (μgC/cell/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
## Warning: Ignoring unknown parameters: width
legend_s1 <- get_legend(plot1)
## Error in f(...): object 'season_shapes' not found
top_row_S1 <- plot_grid(plot1 +theme(legend.position = "none"), plot2, plot3,
nrow = 1, ncol = 3,
labels = c("A", "B", "C"),
align = "h")
## Warning in f(...): restarting interrupted promise evaluation
## Warning in f(...): restarting interrupted promise evaluation
## Error in f(...): object 'season_shapes' not found
plot_grid(top_row_S1, legend_s1,
rel_heights = c(1, 0.05),
nrow = 2, ncol = 1)
## Error in plot_grid(top_row_S1, legend_s1, rel_heights = c(1, 0.05), nrow = 2, : object 'top_row_S1' not found
Linear Regressions with Fraction Diversity
#################################### Bulk Measure Production ####################################
################### Richness ###################
summary(lm(frac_bacprod ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
# Particle-Associated
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.722233 0.6674307 2.204547 0.2884269
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
################### Shannon Entropy ###################
summary(lm(frac_bacprod ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.659 -11.878 -5.666 9.231 46.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373 26.71652 0.623 0.540
## mean 0.08797 6.22969 0.014 0.989
##
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared: 9.064e-06, Adjusted R-squared: -0.04545
## F-statistic: 0.0001994 on 1 and 22 DF, p-value: 0.9889
# Particle-Associated
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_prod_vs_shannon_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9022 -2.9150 -0.5875 1.6713 12.0544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40.320 14.007 -2.878 0.01643 *
## mean 11.089 3.068 3.614 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5231
## F-statistic: 13.06 on 1 and 10 DF, p-value: 0.004734
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_shannon_PA <- train(
frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.743204 0.6698453 2.440644 0.283921
summary(lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.004 -10.708 -3.738 6.632 37.129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.37 73.87 -0.181 0.860
## mean 9.40 18.50 0.508 0.622
##
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared: 0.02516, Adjusted R-squared: -0.07233
## F-statistic: 0.2581 on 1 and 10 DF, p-value: 0.6225
################### Inverse Simpson ###################
summary(lm(frac_bacprod ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
# Particle-Associated samples
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_invsimps_PA <- train(
frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_invsimps_PA$results # Cross Validated PA results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 5.257223 0.755187 2.091455 0.2576904
#Free Living Samples
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
################### Simpson's Evenness ###################
summary(lm(frac_bacprod ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.000 -7.163 -3.815 3.392 45.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8524 9.2286 -0.092 0.9272
## mean 274.1606 134.4253 2.040 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1208
## F-statistic: 4.16 on 1 and 22 DF, p-value: 0.05359
# Particle-Associated
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.973 -2.229 -1.086 1.356 12.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.829 4.539 -0.844 0.41865
## mean 234.057 71.238 3.286 0.00821 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.471
## F-statistic: 10.79 on 1 and 10 DF, p-value: 0.008211
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_simpseven_PA <- train(
frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.225165 0.6598027 3.078422 0.3241796
summary(lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.389 -12.029 -3.306 4.668 39.946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.85 23.52 0.674 0.516
## mean 114.96 321.02 0.358 0.728
##
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared: 0.01266, Adjusted R-squared: -0.08607
## F-statistic: 0.1282 on 1 and 10 DF, p-value: 0.7277
#################################### Per-Cell Production ####################################
################### Richness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
# Particle-Associated
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_rich_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4259432 0.6442049 0.1528492 0.3200838
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
################### Shannon Entropy ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03514 -0.15042 -0.03394 0.26568 0.82794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.9036 0.7534 -14.472 2.14e-12 ***
## mean 0.8805 0.1763 4.993 6.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4348 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5428, Adjusted R-squared: 0.521
## F-statistic: 24.93 on 1 and 21 DF, p-value: 6.09e-05
# Particle-Associated
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_percell_prod_vs_shannon_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36686 -0.23571 -0.01330 0.03961 0.70312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.8897 0.8820 -11.213 1.37e-06 ***
## mean 0.6993 0.1935 3.614 0.00562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.5468
## F-statistic: 13.06 on 1 and 9 DF, p-value: 0.00562
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_shannon_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4165133 0.6923215 0.1364644 0.282022
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72444 -0.17785 0.08114 0.13946 0.67366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.8666 1.6332 -5.429 0.000289 ***
## mean 0.3243 0.4091 0.793 0.446298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared: 0.05914, Adjusted R-squared: -0.03495
## F-statistic: 0.6285 on 1 and 10 DF, p-value: 0.4463
################### Inverse Simpson ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
# Particle-Associated
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_invsimps_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_invsimps_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.3530999 0.7312218 0.1134866 0.3167412
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
################### Simpson's Evenness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06874 -0.41920 0.04553 0.34511 1.56565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4496 0.4116 -18.10 2.74e-14 ***
## mean 4.3470 6.0344 0.72 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6353 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02411, Adjusted R-squared: -0.02236
## F-statistic: 0.5189 on 1 and 21 DF, p-value: 0.4792
# Particle-Associated
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_percell_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4726 -0.2230 -0.1106 0.1248 0.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5941 0.2885 -26.324 7.96e-10 ***
## mean 15.1887 4.6341 3.278 0.00957 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4935
## F-statistic: 10.74 on 1 and 9 DF, p-value: 0.009566
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_simpseven_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4449297 0.6843005 0.1423662 0.3106107
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63112 -0.24216 0.01388 0.14672 0.77426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9274 0.5202 -15.240 3e-08 ***
## mean 4.9361 7.1008 0.695 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared: 0.04609, Adjusted R-squared: -0.0493
## F-statistic: 0.4832 on 1 and 10 DF, p-value: 0.5028
R2 table
########## PUT A TABLE TOGETHER
# Per Liter Production
perliter_row1 <- c("Richness", "Per-Liter",
round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
perliter_row2 <- c("Shannon_Entropy", "Per-Liter",
round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
perliter_row3 <- c("Inverse_Simpson", "Per-Liter",
round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
perliter_row4 <- c("Simpsons_Evenness","Per-Liter",
round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
# Per cell production
percell_row1 <- c("Richness", "Per-Cell",
round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
percell_row2 <- c("Shannon_Entropy", "Per-Cell",
round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
percell_row3 <- c("Inverse_Simpson", "Per-Cell",
round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
percell_row4 <- c("Simpsons_Evenness", "Per-Cell",
round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
r2_table <- as.data.frame(rbind(perliter_row1, perliter_row2, perliter_row3, perliter_row4,
percell_row1, percell_row2, percell_row3, percell_row4))
colnames(r2_table) <- c("Diversity_Metric", "Production_Measure","Adjusted_R2","CV_R2", "CV_R2_SD")
row.names(r2_table) = NULL
pander(r2_table,
caption = "Supplemental Table 2: \n R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.")
Supplemental Table 2: R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.
| Richness |
Per-Liter |
0.559 |
0.667 |
0.288 |
| Shannon_Entropy |
Per-Liter |
0.523 |
0.67 |
0.284 |
| Inverse_Simpson |
Per-Liter |
0.692 |
0.755 |
0.258 |
| Simpsons_Evenness |
Per-Liter |
0.471 |
0.66 |
0.324 |
| Richness |
Per-Cell |
0.566 |
0.644 |
0.32 |
| Shannon_Entropy |
Per-Cell |
0.547 |
0.692 |
0.282 |
| Inverse_Simpson |
Per-Cell |
0.689 |
0.731 |
0.317 |
| Simpsons_Evenness |
Per-Cell |
0.493 |
0.684 |
0.311 |
Prepare Figure 2
######################################################### OBSERVED RICHNESS
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 556.7992 167.31404
## 2 Free 338.4242 85.98665
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=790, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Richness vs Per-Liter Heterotrophic Production Plot
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
######################################################### INVERSE SIMPSON
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 35.47659 23.843325
## 2 Free 24.09219 8.136901
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# INVERSE SIMPSON
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Inverse Simpson vs per-cell production Plot
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
Plot Figure 2
# All 3 richness plots together
rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot,
percell_prod_vs_rich_plot + theme(legend.position = "none"),
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 1, 1),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
invsimps_plots_noyaxis <-
plot_grid(invsimps_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()),
prod_vs_invsimps_plot + theme(axis.title.y = element_blank()),
percell_prod_vs_invsimps_plot + theme(axis.title.y = element_blank(), legend.position = "none"),
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 1, 1),
align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
figure2_row1 <- plot_grid(rich_plots, invsimps_plots_noyaxis,
ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
align = "h")
######## FIGURE 2
plot_grid(figure2_row1, legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))
## Error in ggplot_to_gtable(x): Argument needs to be of class "ggplot" or "gtable"
Prepare Figure S2
######################################################### SHANNON_ENTROPY
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 4.534078 0.5594638
## 2 Free 3.982314 0.2958221
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(5.8, 5.9, 5.9, 5.8)) # WholePart vs WholeFree
shannon_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
xlab("Fraction") +
# Add line and pval
geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=5.6, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Shannon vs Per-Liter Heterotrophic Production Plot
prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
annotate("text", x = 5.25, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 5.25, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
simpseven_fraction_wilcox <- wilcox.test(mean ~ fraction, data = simpseven)
simpseven_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 55, p-value = 0.3474
## alternative hypothesis: true location shift is not equal to 0
filter(simpseven) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 0.05890559 0.02537484
## 2 Free 0.07138806 0.01716014
# Make a data frame to draw significance line in boxplot (visually calculated)
simpseven_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(0.14, 0.15, 0.15, 0.14)) # WholePart vs WholeFree
simpseven_distribution_plot <-
ggplot(simpseven, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
ylab("Simpson's Evenness") +
xlab("Fraction") +
geom_path(data = simpseven_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=0.14, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# SIMPSONS EVENNESS
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
ylab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=15, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Plot
percell_prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=-6.3, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
simpseven_plots <- plot_grid(simpseven_distribution_plot, prod_vs_simpseven_plot, percell_prod_vs_simpseven_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
Prepare Figure S3
plot_all_rich_percell <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Observed Richness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 790, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$coefficients[,4][2]), digits = 6))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_shannon_percell <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Shannon Entropy") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 5.25, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_invsimps_percell <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 70, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_simpseven_percell <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Simpson's Evenness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 0.12, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$coefficients[,4][2]), digits = 2))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
figs3_row1 <- plot_grid(plot_all_rich_percell + theme(legend.position = "none"),
plot_all_shannon_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
plot_all_invsimps_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
plot_all_simpseven_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
align = "h", labels = c("A", "B", "C", "D"),
rel_widths = c(1.05, 0.9, 0.9, 0.9),
nrow = 1, ncol = 4)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
######## FIGURE S3
# legend
legend_grey <- get_legend(plot_all_rich_percell)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
plot_grid(figs3_row1, legend_grey,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))

Perform Ridge and Lasso regression
Bulk Community Production: PARTICLE
set.seed(777)
scaled_comm_data <- lasso_data_df_particle_noprod %>%
scale() %>%
as.data.frame()
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(frac_bacprod ~ ., scaled_comm_data)[,-1]
y = scaled_comm_data$frac_bacprod
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = TRUE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.122415
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 1.405385
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -1.499450e-17
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund .
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 .
## PC6 .
## Richness .
## Shannon_Entropy .
## Inverse_Simpson 1.609355e-01
## Simpsons_Evenness .
## unweighted_div .
## weighted_div .
The test MSE for ridge regression is 1.1224147 while the test MSE for lasso is 1.4053855.
Additionally, the lasso model uses Inverse Simpson as the best and only predictor of production!
Per capita Production: PARTICLE
scaled_percapita_data <- percell_lasso_data_df_particle_noprod %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame()
set.seed(777)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(log10_percell ~ ., scaled_percapita_data)[,-1]
y = scaled_percapita_data$log10_percell
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = FALSE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.67758
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = FALSE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 1.743155
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = FALSE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -7.286299e-16
## Sample_depth_m .
## Temp_C -1.436862e-01
## SpCond_uSpercm .
## TDS_mgperL .
## pH -3.377692e-04
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund -7.429931e-03
## attached_bac -3.474949e-02
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund .
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 -4.178542e-02
## PC6 .
## Richness .
## Shannon_Entropy .
## Inverse_Simpson 4.300754e-01
## Simpsons_Evenness .
## unweighted_div .
## weighted_div .
Per-capita Production: ALL SAMPLES
set.seed(777)
scaled_percapita_ALL <- all_dat_lasso_percapita %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame()
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(log10_percell ~ ., scaled_percapita_ALL)[,-1]
y = scaled_percapita_ALL$log10_percell
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = TRUE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.03235
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 0.7946119
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.540194e-16
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -1.572133e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL -9.611067e-04
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund -1.295859e-01
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 .
## PC6 .
## Richness 3.750210e-01
## Shannon_Entropy .
## Inverse_Simpson .
## Simpsons_Evenness .
## unweighted_div -7.701967e-02
## weighted_div .
Figure 3: Unweighted ses.mpd
######################################################### SES MPD DISTRIBUTION: UNWEIGHTED
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.4970703
## 2 Free 0.4375166
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
unweight_distribution_plot <-
ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.35, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -195.66 -96.12 -14.17 80.15 295.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 443.89 28.29 15.690 1.98e-13 ***
## mpd.obs.z -124.90 34.37 -3.634 0.00147 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared: 0.3751, Adjusted R-squared: 0.3467
## F-statistic: 13.2 on 1 and 22 DF, p-value: 0.001467
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -191.75 -112.16 -41.91 55.88 278.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 515.16 51.16 10.069 1.49e-06 ***
## mpd.obs.z -83.76 49.91 -1.678 0.124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared: 0.2198, Adjusted R-squared: 0.1417
## F-statistic: 2.816 on 1 and 10 DF, p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.15 -66.22 -18.72 43.66 172.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.075 42.750 7.885 1.34e-05 ***
## mpd.obs.z 3.083 77.507 0.040 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared: 0.0001582, Adjusted R-squared: -0.09983
## F-statistic: 0.001583 on 1 and 10 DF, p-value: 0.9691
unweight_rich_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(fill = fraction, shape = season)) + ylab("Observed Richness") +
scale_shape_manual(values = season_shapes) +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.144 -4.312 -1.822 3.403 19.527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.213 2.617 3.139 0.0105 *
## mpd.obs.z -3.510 2.553 -1.375 0.1991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.07492
## F-statistic: 1.891 on 1 and 10 DF, p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.003 -14.934 2.196 8.756 36.997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.183 8.396 2.166 0.0556 .
## mpd.obs.z 13.428 15.223 0.882 0.3984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared: 0.07219, Adjusted R-squared: -0.02059
## F-statistic: 0.7781 on 1 and 10 DF, p-value: 0.3984
unweight_prod_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(shape = season)) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7226 -0.2897 0.0040 0.1294 1.3193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1988 0.0999 -72.057 < 2e-16 ***
## mpd.obs.z -0.4972 0.1205 -4.127 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4779 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4478, Adjusted R-squared: 0.4215
## F-statistic: 17.03 on 1 and 21 DF, p-value: 0.0004795
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37972 -0.24190 -0.16526 0.04437 1.18153
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9247 0.1711 -40.472 1.71e-11 ***
## mpd.obs.z -0.3298 0.1627 -2.027 0.0733 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4649 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3135, Adjusted R-squared: 0.2372
## F-statistic: 4.109 on 1 and 9 DF, p-value: 0.07327
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63981 -0.29782 0.07682 0.17625 0.76499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.55633 0.19602 -38.55 3.29e-12 ***
## mpd.obs.z -0.04279 0.35539 -0.12 0.907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared: 0.001447, Adjusted R-squared: -0.09841
## F-statistic: 0.01449 on 1 and 10 DF, p-value: 0.9066
unweight_percell_vs_mpd_plot <-
ggplot(unweighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(fill = fraction, shape = season)) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
#stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
######## FIGURE 3
figure3_row1 <- plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot,
unweight_percell_vs_mpd_plot + theme(legend.position = "none"),
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 1, 1, 1),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
plot_grid(figure3_row1, legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))
## Error in ggplot_to_gtable(x): Argument needs to be of class "ggplot" or "gtable"
Figure S6: Weighted ses.mpd
######################################################### SES MPD DISTRIBUTION: WEIGHTED
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.3607944
## 2 Free -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
weight_distribution_plot <-
ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
geom_jitter(size = 3.5, aes(fill = fraction, shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.55, fontface = "bold", size = 3.5, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.42 -19.15 0.40 12.95 40.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.58 12.61 1.870 0.0911 .
## mpd.obs.z -32.98 29.43 -1.121 0.2886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.02276
## F-statistic: 1.256 on 1 and 10 DF, p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0597 -4.3465 -0.8155 3.8832 18.4294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.187 4.720 4.701 0.00084 ***
## mpd.obs.z -4.942 10.486 -0.471 0.64753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared: 0.02173, Adjusted R-squared: -0.0761
## F-statistic: 0.2221 on 1 and 10 DF, p-value: 0.6475
weight_invsimps_vs_mpd_plot <-
ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3.5, aes(shape = season)) + ylab("Inverse Simpson") +
scale_shape_manual(values = season_shapes) +
#xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-LITER PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.030 -10.236 -2.842 5.937 38.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.008 5.664 1.590 0.126
## mpd.obs.z -21.439 12.889 -1.663 0.110
##
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared: 0.1117, Adjusted R-squared: 0.07133
## F-statistic: 2.767 on 1 and 22 DF, p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.240 -4.575 -1.717 4.503 15.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.391 4.126 1.064 0.312
## mpd.obs.z -15.432 9.627 -1.603 0.140
##
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared: 0.2044, Adjusted R-squared: 0.1249
## F-statistic: 2.569 on 1 and 10 DF, p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.790 -12.275 -3.481 7.360 30.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.697 9.683 1.518 0.160
## mpd.obs.z -24.285 21.512 -1.129 0.285
##
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.02434
## F-statistic: 1.274 on 1 and 10 DF, p-value: 0.2853
weight_prod_vs_mpd_plot <-
ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3.5, aes(shape = season)) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94765 -0.40616 -0.03619 0.31885 1.39101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5352 0.2442 -30.860 <2e-16 ***
## mpd.obs.z -0.9519 0.5446 -1.748 0.0951 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6009 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.08544
## F-statistic: 3.055 on 1 and 21 DF, p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65329 -0.32632 -0.03486 0.22224 0.95307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0845 0.3015 -23.496 2.18e-09 ***
## mpd.obs.z -0.9337 0.6753 -1.383 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5096 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.08357
## F-statistic: 1.912 on 1 and 9 DF, p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54189 -0.16159 -0.00204 0.20070 0.44618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9519 0.1848 -43.019 1.11e-12 ***
## mpd.obs.z -0.9777 0.4107 -2.381 0.0386 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.2979
## F-statistic: 5.667 on 1 and 10 DF, p-value: 0.03857
weight_percell_vs_mpd_plot <-
ggplot(weighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Cell counts and Unweighted SESmpd
# Combine the datasets into
cell_nums <- otu_alphadiv %>%
dplyr::select(norep_filter_name, fraction_bac_abund) %>%
distinct()
unweight_cellnums <- cell_nums %>%
left_join(unweighted_df, by = "norep_filter_name") %>%
dplyr::filter(norep_filter_name != "MOTEJ515")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining character vector and factor, coercing into character vector
# Is there a relationship between cell numbers and Unweighted Mean Pairwise distance?
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums)))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4107 -0.1703 0.1661 0.3388 0.7078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2637 0.1119 47.041 < 2e-16 ***
## mpd.obs.z 0.5256 0.1349 3.895 0.000835 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5352 on 21 degrees of freedom
## Multiple R-squared: 0.4194, Adjusted R-squared: 0.3917
## F-statistic: 15.17 on 1 and 21 DF, p-value: 0.0008354
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60108 -0.11615 0.06289 0.23310 0.34729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.61321 0.11608 39.742 2.01e-11 ***
## mpd.obs.z 0.06373 0.11039 0.577 0.578
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3154 on 9 degrees of freedom
## Multiple R-squared: 0.03572, Adjusted R-squared: -0.07143
## F-statistic: 0.3334 on 1 and 9 DF, p-value: 0.5778
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Free")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.294515 -0.070928 -0.008011 0.116064 0.216553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.76213 0.08022 71.829 6.67e-15 ***
## mpd.obs.z 0.16564 0.14544 1.139 0.281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1692 on 10 degrees of freedom
## Multiple R-squared: 0.1148, Adjusted R-squared: 0.02629
## F-statistic: 1.297 on 1 and 10 DF, p-value: 0.2813
ggplot(unweight_cellnums, aes(y = log10(fraction_bac_abund), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(bacterial cells/mL)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=4.25, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = c(0.12, 0.9))

Figure S7
# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))
##
## Kruskal-Wallis rank sum test
##
## data: mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
filter(fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mpd.obs.z), sd(mpd.obs.z))
## # A tibble: 4 x 3
## lakesite `mean(mpd.obs.z)` `sd(mpd.obs.z)`
## <fctr> <dbl> <dbl>
## 1 Outlet 0.7950222 0.5856743
## 2 Deep -0.9216060 0.3564753
## 3 Bear -0.7817952 0.9425671
## 4 River -1.0799021 0.2412376
# Lakesite
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = lakesite)) +
ggtitle("Particle-Associated Samples Only") +
scale_fill_manual(values = lakesite_colors) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) +
geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
theme(axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank())
# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = season)) +
ggtitle("Particle-Associated Samples Only") +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = season_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) +
geom_boxplot(aes(fill = season), alpha = 0.5) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = c(0.9, 0.9), legend.title = element_blank())
plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

plot_station_rich <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Particle Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 4 x 3
## lakesite `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Outlet 430.8567 81.99728
## 2 Deep 472.0833 23.48010
## 3 Bear 637.9933 242.53768
## 4 River 686.2633 135.20325
plot_station_invsimps <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Particle Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean))
## # A tibble: 4 x 2
## lakesite `mean(mean)`
## <fctr> <dbl>
## 1 Outlet 23.66717
## 2 Deep 23.76370
## 3 Bear 35.36997
## 4 River 59.10553
plot_grid(plot_station_rich, plot_station_invsimps,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

#### FREE LIVING
plot_station_rich_FL <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Free-Living Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 4 x 3
## lakesite `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Outlet 297.5467 74.24756
## 2 Deep 313.9267 58.54588
## 3 Bear 293.9033 55.85800
## 4 River 448.3200 64.10530
plot_station_invsimps_FL <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Free-Living Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean))
## # A tibble: 4 x 2
## lakesite `mean(mean)`
## <fctr> <dbl>
## 1 Outlet 23.87979
## 2 Deep 22.42163
## 3 Bear 19.06537
## 4 River 31.00198
plot_grid(plot_station_rich_FL, plot_station_invsimps_FL,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

Plot Figure S5: Randomized Richness
# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
#random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
#random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData"))
load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2911 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
## Calculate input for SES_MPD
# Convert the presence/absence data to standardized abundanced vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 906 574 434 268 256 358 493 447 476 276 284 381 840 632 586 452 383 511 505 343 444 274 238 381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm
# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
df <- unweighted_sesMPD_indepswap_randomized
df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.74 -129.44 -12.54 53.58 468.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 448.27 35.48 12.633 1.47e-11 ***
## mpd.obs.z 56.32 94.24 0.598 0.556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared: 0.01598, Adjusted R-squared: -0.02875
## F-statistic: 0.3572 on 1 and 22 DF, p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -203.87 -109.15 -44.96 44.29 341.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 557.62 50.67 11.006 6.56e-07 ***
## mpd.obs.z -33.22 140.62 -0.236 0.818
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared: 0.00555, Adjusted R-squared: -0.0939
## F-statistic: 0.05581 on 1 and 10 DF, p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.76 -48.80 -17.85 56.29 159.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 342.48 24.62 13.913 7.19e-08 ***
## mpd.obs.z 74.88 62.79 1.193 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared: 0.1245, Adjusted R-squared: 0.03699
## F-statistic: 1.422 on 1 and 10 DF, p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1,1)) +
theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))

Plot: Temp vs Production
ggplot(lasso_data_df, aes(x = Temp_C, y = frac_bacprod, color = fraction)) +
geom_point(size = 3.5, aes(shape = season, fill = fraction), color = "black", guide = FALSE) +
xlab("Water Temperature (C)") + ylab("Bulk Community Production (ug C/L/Hr)") +
scale_fill_manual(values = fraction_colors, guide = 'none') +
scale_color_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", fill = NA, lty = 2, aes(color = fraction)) +
theme(legend.position = c(0.5, 0.95), legend.direction = "horizontal",
legend.title = element_blank())
## Warning: Ignoring unknown parameters: guide

free_data_df <- dplyr::filter(lasso_data_df, fraction == "Free")
summary(lm(frac_bacprod ~ Temp_C, data = free_data_df))
##
## Call:
## lm(formula = frac_bacprod ~ Temp_C, data = free_data_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.448 -11.678 -3.663 6.226 35.044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.3131 27.3683 1.510 0.162
## Temp_C -0.9054 1.4099 -0.642 0.535
##
## Residual standard error: 18.02 on 10 degrees of freedom
## Multiple R-squared: 0.03961, Adjusted R-squared: -0.05643
## F-statistic: 0.4124 on 1 and 10 DF, p-value: 0.5352
particle_data_df <- dplyr::filter(lasso_data_df, fraction == "Particle")
summary(lm(frac_bacprod ~ Temp_C, data = particle_data_df))
##
## Call:
## lm(formula = frac_bacprod ~ Temp_C, data = particle_data_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.775 -4.172 -1.356 3.410 15.237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.6913 11.3062 2.715 0.0218 *
## Temp_C -1.0879 0.5825 -1.868 0.0913 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.444 on 10 degrees of freedom
## Multiple R-squared: 0.2586, Adjusted R-squared: 0.1845
## F-statistic: 3.489 on 1 and 10 DF, p-value: 0.09133